# This script cleans generated tracks
# 1) Merges tracks split at months
# 2) Connects tracks that connect WC to AK-HI waypoints
# 3) Keeps interpolated AK-HI and original tracks...
# 4/2/2020

# C:\Python27\ArcGISx6410.5\python.exe merge_tracks_AK_HI_add_interp.py

# for use with anaconda
try:
    import archook #The module which locates arcgis
    archook.get_arcpy()
    import arcpy
except ImportError:
    error
    # do whatever you do if arcpy isnt there.

#import arcpy
import numpy
import datetime
import time
import os
import math

import multiprocessing
from functools import partial

import json

arcpy.env.overwriteOutput=True

#numpy.set_printoptions(threshold=numpy.nan)

spatial_reference = arcpy.SpatialReference(4326) # GCS_WGS_1984

### WEST COAST TRACKS OPTIONS #########################
# layers used in geoprocessing calculations
AKHI_file = r"H:\My Drive\Boats\ReplicationCode\data\port_buffers.gdb\AK_HI_ports"
ports_file = r"H:\My Drive\Boats\ReplicationCode\data\port_buffers.gdb\west_coast_ports_eca100_AK_HI_v3"
CAeca2011_file = r"H:\My Drive\Boats\ReplicationCode\data\eca_boundaries\CAeca_201112.shp" # using 2011 b/c generally broader
NAeca_file = r"H:\My Drive\Boats\ReplicationCode\data\eca_boundaries\NA_ECA_polygon.shp" 
#NAeca_file = r"G:\data\port_buffers.gdb\NA_ECA_polygon" 

#CAeca2009_file = r"G:\data\ECAboundaries\CAeca_200907.shp"

# location of tracks
lines_file_str = r"H:\My Drive\Boats\ReplicationCode\data\AIS\lines_v2\%s\lines%02d.gdb\westcoast5_lines_%s_%s" 

# lines to select
##line_selector = 'not VesselType=33 and not VesselType=34 and not VesselType=99 and not VesselType=52' # getting rid of tugs and dredges
#line_selector = 'MMSI=205543000 or MMSI=209526000'
#line_selector = 'MMSI>=366650200'
line_selector = '' 

# output file
cleaned_out_folder = r"H:\My Drive\Boats\ReplicationCode\data\AIS\lines_v2\merged_lines.gdb"
cleaned_out_file = r"cleaned_v2_%s"
#interp_out_file = r"interp_%s"
#cleaned_out = r"G:\data\temp2.gdb\cleaned_wAK_2009_3"

do_interp = True 
#############################################



# field to copy over (2009-2014)
flds_to_copy_0914 = ['IMO','CallSign','Name','VesselType','Length','Width','DimensionComponents']

# fields to copy over (2015-2016)
flds_to_copy_1516 = ['IMO','VesselName','VesselType','Length','Width']

months = range(1,13) 
years = range(2009,2017) # 2017)

## TESTING
##months = [1,2]
##years = [2009]




def main() :

    start = time.time()

    # Create output gdb if needed
    if not arcpy.Exists(cleaned_out_folder) == True :
        print "Create Output GDB"
        #print os.path.dirname(out_gdb) , os.path.basename(out_gdb)
        arcpy.CreateFileGDB_management( os.path.dirname(cleaned_out_folder) , os.path.basename(cleaned_out_folder) )

    # getting geometry for interp
    if do_interp :
        print("Loading Geometry for Interpolation")
        port_geom, port_ids, CAeca11_geom, NAeca_geom, AKHI_geom, AKHI_ids = get_wc_geom(ports_file,AKHI_file,CAeca2011_file,NAeca_file,spatial_reference) 
    
    # Looping through years
    for year in years :

        if year in [2009,2010,2011,2012,2013,2014] :
            flds_to_copy = flds_to_copy_0914 
        else:
            flds_to_copy = flds_to_copy_1516 
        
        cleaned_out = os.path.join( cleaned_out_folder , cleaned_out_file % (year) )
        #interp_out = os.path.join( cleaned_out_folder , interp_out_file % (year) )
        print cleaned_out    
                
        # merging all months in year
        print "putting monthly lines feature classes in memory"
        merge_list = [] ;
        for month in months :
            # location of points and other tables
            lines_m = "lines_%s" % month # name of month fc
            merge_list.append(lines_m)
            
            line_source = os.path.join(lines_file_str % (year,month,year,month) )
            arcpy.management.MakeFeatureLayer(line_source,lines_m,line_selector)

        print "merging month feature classes and sorting"
        lines_fc = os.path.join( "in_memory", "merged_%s" % (year) )
        lines_fc_unsort = os.path.join( "in_memory", "merged_tmp_%s" % (year) )
        arcpy.Merge_management(merge_list,lines_fc_unsort)
        arcpy.Sort_management(lines_fc_unsort, lines_fc,[["MMSI", "ASCENDING"], ["TrackStartTime", "ASCENDING"]])
        arcpy.Delete_management(lines_fc_unsort)
        
        cleaned_dir = "in_memory" #r"E:/data/temp.gdb"
        cleaned_name = r"cleantemp"
        cleaned_fc = os.path.join(cleaned_dir,cleaned_name)
                
        # create feature class for cleaned lines
        #arcpy.CreateFeatureclass_management(split_dir, split_name,geometry_type="POLYLINE",spatial_reference=spatial_reference,has_m = "ENABLED")
        arcpy.CreateFeatureclass_management(cleaned_dir, cleaned_name,geometry_type="POLYLINE" \
                                            ,spatial_reference=spatial_reference,has_m = "ENABLED",template=lines_fc)
        
        interp_dir = "in_memory" #r"E:/data/temp.gdb"
        interp_name = r"interptemp"
        interp_fc = os.path.join(interp_dir,interp_name)
        arcpy.CreateFeatureclass_management(interp_dir, interp_name,geometry_type="POLYLINE" \
                                            ,spatial_reference=spatial_reference,has_m = "ENABLED",template=lines_fc)

        # add fields
        arcpy.AddField_management(cleaned_fc,"OUT_CAECA","SHORT")
        arcpy.AddField_management(cleaned_fc,"OUT_NAECA","SHORT")
        arcpy.AddField_management(cleaned_fc,"INTERP_FLAG","SHORT")
        arcpy.AddField_management(cleaned_fc,"TRACK_ID","LONG")
        arcpy.AddField_management(cleaned_fc,"INTERP_TRACKS","TEXT",field_is_nullable="NULLABLE",field_length=350)
        #arcpy.AddField_management(cleaned_fc,"OUT_NAECA","SHORT")
        #arcpy.AddField_management(cleaned_fc,"TIME1","DATE")
        #arcpy.AddField_management(cleaned_fc,"TIME2","DATE")
        #arcpy.AddField_management(cleaned_fc,"RIGHT1","SHORT")
        #arcpy.AddField_management(cleaned_fc,"RIGHT2","SHORT")

        arcpy.AddField_management(interp_fc,"OUT_CAECA","SHORT")
        arcpy.AddField_management(interp_fc,"OUT_NAECA","SHORT")
        arcpy.AddField_management(interp_fc,"INTERP_FLAG","SHORT")
        arcpy.AddField_management(interp_fc,"INTERP_TRACKS","TEXT",field_is_nullable="NULLABLE",field_length=350)
        arcpy.AddField_management(interp_fc,"TRACK_ID","LONG",field_is_nullable="NULLABLE")


        cur = None
        try:
            # Open an insert cursor for the new feature class
            cur = arcpy.da.InsertCursor(cleaned_fc, ['SHAPE@','MMSI','TrackStartTime','TrackEndTime'] + flds_to_copy + ['OUT_CAECA','OUT_NAECA','INTERP_FLAG','TRACK_ID']  )
            #cur = arcpy.da.InsertCursor(cleaned_fc, ['SHAPE@','MMSI','TrackStartTime','TrackEndTime'] + flds_to_copy  )

            # looping through sorted list
            row_prev = None
            mmsi_prev = None
            add_flag = 0 
            track_id = 1 
            for row in arcpy.da.SearchCursor(lines_fc, ['SHAPE@','MMSI','TrackStartTime','TrackEndTime'] + flds_to_copy , \
                                              sql_clause=(None, 'ORDER BY MMSI, TrackStartTime') ,spatial_reference=spatial_reference ) :

                if mmsi_prev == None :
                    # this is first iteration
                    row_prev = row
                    mmsi_prev = row[1]
                    
                else :

                    add_flag = 0
                    # only loop through if shape exists
                    # this if will skip any records with missing geometry
                    if (not row[0]==None)  :
                        
                        mmsi = row[1] # get current lines mmsi

                        print(u'{0}, {1}; {2}, {3}'.format(mmsi_prev,row_prev[2],mmsi,row[2]))
                    
                        # check if same MMSI
                        if mmsi_prev == mmsi :

                            # check if previous track connects

                            # first check if lines are broken at a change in month
                            
                            # get time stamps for last point on previous track and first point on current track
                            tlast = datetime.datetime.utcfromtimestamp(row_prev[0].lastPoint.M)
                            tfirst = datetime.datetime.utcfromtimestamp(row[0].firstPoint.M)
                            
                            dt = tfirst - tlast
                            dt_hours = abs(dt.total_seconds() / 3600)

                            # if start and end are less than 1 hr apart and cross month
                            if dt_hours<1 and not (tfirst.month==tlast.month)  :
                                print "Merging due to month break"
                                ln_new = connect_lines(row_prev[0],row[0])
                                #row_prev = ( ln_new , row_prev[1], row_prev[2] , list(row_prev[3:]) )
                                row_prev = tuple( [ln_new] + [row[1]] + [row_prev[2],row[3]] + list(row_prev[4:]) )
                                outofECA = 0
                                outofNAECA = 0 
                                interpFlag = 0
                            
                            else : 
                                print "add"
                                add_flag = 1 
                                  
                        else : # mmsi nest 
                            print "add"
                            add_flag = 1 
                            
                        
                        if add_flag == 1 :
                            # add row_prev to table                              
                            cur.insertRow( list(row_prev) + [0,0,0] + [track_id]) # adding track (not interpolated)
                            track_id = track_id + 1 
                            #if add_interp == 1 : 
                            #    cur.insertRow( list(row_interp) + [outofECA,outofNAECA,1] ) # adding interpolated track
                                

                            # update previous row and mmsi rows list
                            row_prev = row
                            mmsi_prev = row[1]

                    else  :
                        print "skipped - missing geom"

                        # update interp flags
                        #interpFlag_prev = interpFlag
                        #outofECA_prev = outofECA
                        #outofNAECA_prev = outofNAECA
                        
            # need to add final row
                # if add flag==1 then will add current row b/c set above
                # if add flag==0 then will get merged track
            cur.insertRow( list(row_prev) + [0,0,0] + [track_id] )
            #cur.insertRow( list(row_prev) )   

        except Exception as e:
            print "failed in loop through lines"
            print(e)
        finally:
            # Cleanup the cursor if necessary
            if cur:
                del cur

        if do_interp:
            ###########################################################################    
            # Now loop through cleaned tracks and interpolate where applicable ######## 
            cur = None
            try:
                # Open an insert cursor for the new feature class
                cur = arcpy.da.InsertCursor(interp_fc, ['SHAPE@','MMSI','TrackStartTime','TrackEndTime','TRACK_ID'] + flds_to_copy + ['OUT_CAECA','OUT_NAECA','INTERP_FLAG','INTERP_TRACKS']  )

                # looping through sorted list
                row_prev = None
                mmsi_prev = None
                interp_flag = 0 
                interp_list = [] 
                add_flag = 0 
                for row in arcpy.da.SearchCursor(cleaned_fc, ['SHAPE@','MMSI','TrackStartTime','TrackEndTime','TRACK_ID'] + flds_to_copy , \
                                                  sql_clause=(None, 'ORDER BY MMSI, TrackStartTime') ,spatial_reference=spatial_reference ) :

                    if mmsi_prev == None :
                        # this is first iteration
                        row_prev = row
                        mmsi_prev = row[1]
                        
                    else :
                        # only loop through if shape exists
                        # this if will skip any records with missing geometry
                        if (not row[0]==None)  :
                            
                            mmsi = row[1] # get current lines mmsi

                            print(u'{0}, {1}; {2}, {3}'.format(mmsi_prev,row_prev[2],mmsi,row[2]))
                        
                            # check if same MMSI
                            if mmsi_prev == mmsi :
                                # check if AK HI connection add interpolated track to other layer
                                AKHI_flag , outofECA , outofNAECA = check_AKHI_connect(row,row_prev,AKHI_geom,AKHI_ids,port_geom,port_ids,CAeca11_geom,NAeca_geom)
                                if AKHI_flag :
                                    print "Interpolated AK HI connection"
                                    ln_new = connect_lines(row_prev[0],row[0])
                                    #row_prev = ( ln_new , row_prev[1], row_prev[2] , row_prev[3:] )
                                    
                                    # add track id to interpolated list
                                    interp_list.append(row_prev[4])
                                    interp_list.append(row[4])
                                    interp_list = filter(lambda a: a != 0, interp_list)
                                    print(interp_list)
                                    
                                    row_prev = tuple( [ln_new] + [row[1]] + [row_prev[2],row[3],0] + list(row_prev[5:]) )
                                    interp_flag = 1 
                                
                                else : # if no ak/hi connection
                                     if interp_flag == 1 :
                                         # if row_prev is interpolated then store
                                         print "add"
                                         interp_list_str = ','.join(map(str, interp_list))
                                         print interp_list_str
                                         cur.insertRow( list(row_prev) + [outofECA,outofNAECA,1,interp_list_str] ) # adding interpolated track 
                                         interp_flag = 0 
                                         interp_list = [] 
                                     row_prev = row
                                     
                            else : # if different MMSI
                                if interp_flag == 1 :
                                    # if row_prev is interpolated then store
                                    print "add"
                                    interp_list_str = ','.join(map(str, interp_list))
                                    print interp_list_str
                                    cur.insertRow( list(row_prev) + [outofECA,outofNAECA,1,interp_list_str] ) # adding interpolated track 
                                    interp_flag = 0
                                    interp_list = [] 
                                    
                                row_prev = row
                                     
                    mmsi_prev = row[1]
     
                # this is for last track
                if interp_flag==1 :
                    interp_list_str = ','.join(map(str, interp_list))
                    print interp_list_str
                    cur.insertRow( list(row_prev) + [outofECA,outofNAECA,1,interp_list_str] ) # adding interpolated track    
                                
            except Exception as e:
                print "failed in loop through lines"
                print(e)
            finally:
                # Cleanup the cursor if necessary
                if cur:
                    del cur
            
            # fc to merge
            clean_interp_merge_list = [cleaned_fc,interp_fc]
        
        else:
            clean_interp_merge_list = [cleaned_fc]

        # copying fc in memory to gdb
        #arcpy.CopyFeatures_management(cleaned_fc,cleaned_out) 
        #arcpy.CopyFeatures_management(interp_fc,interp_out) 
        
        # merge and save
        arcpy.Merge_management(clean_interp_merge_list,cleaned_out)

        print("--- %s seconds ---" % (time.time() - start))


def get_wc_geom(ports_file,AKHI_file,CAeca2011_file,NAeca_file,spatial_reference) :
	
    # create port geometries
    port_selector = "PORT_ID<50" # only using ports not other waypoints
    arcpy.management.MakeFeatureLayer(ports_file,"portbound_int_fc",port_selector)
    #port_geom = arcpy.CopyFeatures_management("portbound_int_fc",arcpy.Geometry())[0]

    port_geom = []
    port_ids = [] 
    for row in arcpy.da.SearchCursor("portbound_int_fc", ["SHAPE@","PORT_ID"],spatial_reference=spatial_reference) :
        port_geom.append(row[0])
        port_ids.append(row[1])          
    del row

    # create CA ECA geometries
    CAeca11_geom = arcpy.CopyFeatures_management(CAeca2011_file,arcpy.Geometry())[0]

    # create NA ECA geometries
    NAeca_geom = arcpy.CopyFeatures_management(NAeca_file,arcpy.Geometry())[0]

    # create list of geometries for AK and HI ports
    arcpy.management.MakeFeatureLayer(AKHI_file,"AKHI_fc")
    AKHI_geom = []
    AKHI_ids = []
    for row in arcpy.da.SearchCursor("AKHI_fc", ["SHAPE@","PORT_ID"],spatial_reference=spatial_reference) :
        AKHI_geom.append(row[0])
        AKHI_ids.append(row[1])          
    del row
    
    return port_geom, port_ids, CAeca11_geom, NAeca_geom, AKHI_geom, AKHI_ids

def get_int(ln,geom) :
# this function loops through geometry and determines if there is intersection with any of the geometries
    PORTS = len(geom)

    int_points = []
    for port_i in range(0,PORTS) :
        port_line = geom[port_i]
        
        if not ln.disjoint(port_line) :
            int_points.append(port_i)
            #int_ind.append(port_i)

    return int_points

# this version stores intersecting point
def get_int_point(ln,AKHI_geom,AKHI_ids) :
    PORTS = len(AKHI_ids)

    int_points = []
    int_ind = []

    for port_i in range(0,PORTS) :
        port_line = AKHI_geom[port_i]
        
        if not ln.disjoint(port_line) :
            #print AKHI_ids[port_i]
            int_pnt = ln.intersect(port_line, 1)
            int_points.append(int_pnt)
            int_ind.append(port_i)

    #print int_points

    return int_points , int_ind
            #dist_first_tmp = ptfirst.distanceTo(port_geom[port_i])

def check_AKHI_connect(row,row_prev,AKHI_geom,AKHI_ids,port_geom,port_ids,CAeca11_geom,NAeca_geom) :
    merge_flag = False
    outofECA = 0
    outofNAECA = 0 

    int_points = get_int(row[0],AKHI_geom)
    int_points_prev = get_int(row_prev[0],AKHI_geom)

    if int_points or int_points_prev :
        # only continuing if one of lines intersected
        print "one line intersects AK-HI********************"

        ports_cur = get_int(row[0],port_geom)
        ports_prev = get_int(row_prev[0],port_geom)

#        ports_prev = not row_prev[0].disjoint(port_geom)
#        ports_cur = not row[0].disjoint(port_geom)

        #print ports_prev
        #print ports_cur
        
        # checking if prev and current intersect with each of AK-HI or ports
        if (int_points and ports_prev ) or (int_points_prev and ports_cur ) :
            print "other line intersects ports********************"

            
            # calculating time and distance gap
            pnt_start = row_prev[0].lastPoint 
            pnt_end = row[0].firstPoint 
            tstart = datetime.datetime.utcfromtimestamp(pnt_start.M)
            tend = datetime.datetime.utcfromtimestamp(pnt_end.M)
            dt = tend - tstart 
            dt_hours = abs(dt.total_seconds() / 3600) + 1e-9

            dist_km = GetDistance(pnt_start.X,pnt_start.Y,pnt_end.X,pnt_end.Y)

            # checking speed
            speed_imp = dist_km / dt_hours
            print(u'Time: {0}, Dist: {1}, Speed: {2}'.format(dt_hours,dist_km,speed_imp))

            if speed_imp>15 and speed_imp<45:
                merge_flag=True

                # check if port track extends out of ECA
                if ports_prev :
                    # check end point of previous track b/c previous track intersects ports
                    if not CAeca11_geom.contains(arcpy.PointGeometry(row_prev[0].lastPoint,spatial_reference)) :
                        outofECA = 1
                    if not NAeca_geom.contains(arcpy.PointGeometry(row_prev[0].lastPoint,spatial_reference)) :
                        outofNAECA = 1 

                else:
                    # must be other case, so check first point on current track
                    if not CAeca11_geom.contains(arcpy.PointGeometry(row[0].firstPoint,spatial_reference)) :
                        outofECA = 1

                    if not NAeca_geom.contains(arcpy.PointGeometry(row[0].firstPoint,spatial_reference)) :
                        outofNAECA = 1
                
    return merge_flag, outofECA, outofNAECA
    

def connect_lines(ln_prev,ln) :
    # connects two lines
    # conversts lines to json, combines paths, then creates new geometry
    
    json_prev = json.loads(ln_prev.JSON)
    json_cur = json.loads(ln.JSON)

    paths_new = json_prev['paths'][0]
    #print len(paths_new)
    #print len( json_cur['paths'][0] )

    paths_new.extend(json_cur['paths'][0])
    #print len(paths_new)

    json_merged = json_prev # create new json array
    json_merged['paths'] = [paths_new]

    ln_new = arcpy.AsShape(json_merged, True)

    return ln_new

def GetDistance(prevX,prevY,currX,currY):
    """Calculates the distance between two latitude/longitude coordinate pairs, using the Vincenty formula for ellipsoid based distances.
    Returns the distance in km."""
    #Vincenty Formula
    #Copyright 2002-2012 Chris Veness
    #http://www.movable-type.co.uk/scripts/latlong-vincenty.html
     
    a = 6378137
    b = 6356752.314245
    f = 1/298.257223563

    L = math.radians(currX - prevX)
    U1 = math.atan((1-f)*math.tan(math.radians(prevY)))
    U2 = math.atan((1-f)*math.tan(math.radians(currY)))
    sinU1 = math.sin(U1)
    sinU2 = math.sin(U2)
    cosU1 = math.cos(U1)
    cosU2 = math.cos(U2)

    lam = L
    lamP = 9999999999
    iter_count = 0
    while abs(lam-lamP) > 1e-12 and iter_count <= 100:
        sinLam = math.sin(lam)
        cosLam = math.cos(lam)
        sinSigma = math.sqrt((cosU2*sinLam)*(cosU2*sinLam) + (cosU1*sinU2-sinU1*cosU2*cosLam)*(cosU1*sinU2-sinU1*cosU2*cosLam))

        if sinSigma == 0:
            return 0

        cosSigma = sinU1*sinU2+cosU1*cosU2*cosLam
        sigma = math.atan2(sinSigma,cosSigma)
          
        sinAlpha = cosU1*cosU2*sinLam/sinSigma
        cosSqAlpha = 1 - sinAlpha*sinAlpha
        if cosSqAlpha == 0: #catches zero division error if points on equator
            cos2SigmaM = 0
        else:
            cos2SigmaM = cosSigma - 2*sinU1*sinU2/cosSqAlpha

        if cos2SigmaM == None:
            cos2SigmaM = 0

        C = f/16*cosSqAlpha*(4+f*(4-3*cosSqAlpha))
        lamP = lam
        lam = L+(1-C)*f*sinAlpha*(sigma + C*sinSigma*(cos2SigmaM+C*cosSigma*(-1+2*cos2SigmaM*cos2SigmaM)))

        iter_count+=1

    uSq = cosSqAlpha*(a*a - b*b)/(b*b)
    A = 1 + uSq/16384*(4096+uSq*(-768+uSq*(320-175*uSq)))
    B = uSq/1024*(256+uSq*(-128+uSq*(74-47*uSq)))
    deltaSigma = B*sinSigma*(cos2SigmaM+B/4*(cosSigma*(-1+2*cos2SigmaM*cos2SigmaM)-B/6*cos2SigmaM*(-3+4*sinSigma*sinSigma)*(-3+4*cos2SigmaM*cos2SigmaM)))
    s = b*A*(sigma-deltaSigma)

    #convert s in meters to miles
    #s_miles = s*0.0006213712
    #return s_miles

    s_km = s/1000
    return s_km


if __name__ == "__main__" :
    main() 





